set linesize 140
set rmsg on
set scheme s1mono
/******************************************************************************
	Project		:	Army Service in the All-Volunteer Era
	Author(s)	:	Kyle Greenberg	(kyle.greenberg@westpoint.edu)
					Matthew Gudgeon (matthew.gudgeon@westpoint.edu)
					Adam Isen 		(Adam.Isen@treasury.gov)
					Corbin Miller 	(Corbin.Miller@treasury.gov)
					Rich Patterson 	(rich_patterson@byu.edu)
	File Name	:	balance.do
	Description	:	This file calculates and exports figures and tables related
					to balance checks around the RD cutoffs.
*******************************************************************************/

*---- if not running master, set up file structure ----*
*change directory to where programs and subfolders are stored
*cd ""

if "${raw}"==""		global raw		"raw/"
if "${data}"==""	global data		"data/"
if "${output}"==""	global output	"output/"

cap mkdir	"${output}"
cap mkdir	"${output}balance/"

*---- select subprograms ----*
local histograms		1
local balance_figs		1
local balance_regs		1
local balance_tabs		1

local stratalist all white black

*---- variables and labels ----*
local varlist age_days male white black hisp educ_inhs lo_educ educ_hsdip hi_educ
local age_days_l		"Age"
local male_l			"Male"
local white_l			"White"
local black_l			"Black"
local hisp_l			"Hispanic"
local educ_inhs_l		"In High School"
local educ_hsdip_l		"HS Diploma"
local lo_educ_l			"No HS Diploma"
local hi_educ_l			"Some College+"

local incomelist wages anywages anyf1040 anyf1098T married
local wages_l			"Earnings"
local anywages_l		"Employment"
local anyf1040_l		"Filed Taxes (1040)"
local anyf1098T_l		"Post-Secondary Attendance"
local married_l			"Married"

*formats
foreach x in anywages anyf1040 anyf1098T age_days male white black hisp lo_educ hi_educ educ_inhs educ_hsdip married {
	local `x'_f %5.3f
}
foreach x in wages {
	local `x'_f %2.0f
}

*---------------------------------*
*---- AFQT density histograms ----*
*---------------------------------*
if `histograms'==1{
	*---- bring in data ----*
	use "${data}army-treasury-analysis", clear
	keep firstafqt firstafqt_fy last_afqt_pctl

	*---- First AFQT Score: Full Sample ----*
	*PRE RENORMING: 90-04
	hist firstafqt if firstafqt_fy<2005&inrange(firstafqt,12,68), frac discrete	///
		xline(30.5 49.5) xlabel(15(5)65) xmtick(12(1)68) ylabel(,angle(0))
	graph export "${output}balance/density_prerenorm_`strata'.pdf", as(pdf) replace
	
	*POST RENORMING: 2004+
	hist firstafqt if firstafqt_fy>2004&inrange(firstafqt,12,68), frac discrete	///
		xline(30.5 49.5) xlabel(15(5)65) xmtick(12(1)68) ylabel(,angle(0))
	graph export "${output}balance/density_postrenorm_`strata'.pdf", as(pdf) replace

	*---- Most Recent AFQT Score: Full Sample ----*
	*PRE RENORMING: 90-04
	hist last_afqt_pctl if firstafqt_fy<2005&inrange(last_afqt_pctl,12,68),	///
		frac discrete xline(30.5 49.5) xlabel(15(5)65) xmtick(12(1)68) ylabel(,angle(0))
	graph export "${output}balance/density_lastafqt_prerenorm_`strata'.pdf", as(pdf) replace
	
	*POST RENORMING: 2004+
	hist last_afqt_pctl if firstafqt_fy>2004&inrange(last_afqt_pctl,12,68),	///
		frac discrete xline(30.5 49.5) xlabel(15(5)65) xmtick(12(1)68) ylabel(,angle(0))
	graph export "${output}balance/density_lastafqt_postrenorm_`strata'.pdf", as(pdf) replace
}	//end if
else di "*---- skipping histograms ----*"

*----------------------------------*
*---- covariate balance graphs ----*
*----------------------------------*
if `balance_figs'==1{
	*---- bring in data ----*
	use "${data}army-treasury-analysis", clear
	gen lo_educ = (educ_lessthanhs + educ_ged)
	gen hi_educ = (educ_some_coll + educ_coll_grad)
	foreach outcome of local incomelist{
		rename `outcome'_m1 `outcome'
	}
	keep pid firstafqt `stratalist' `incomelist' `varlist'
	
	*snapshot 1
	snapshot save
	desc 
	
	foreach strata of local stratalist{
		snapshot restore 1
		keep if `strata'==1
	
		*snapshot 2
		snapshot save

		foreach outcome in `incomelist' `varlist'{
			snapshot restore 2
			collapse (mean) `outcome' (sum) all, by(firstafqt)
			tw	(qfitci `outcome' firstafqt if inrange(firstafqt,12,30)	///
				, clc(black) clw(medthick) fc(gs13) alc(white))	///
				(qfitci `outcome' firstafqt if inrange(firstafqt,31,49),	///
				clc(black) clw(medthick) fc(gs13) alc(white))	///
				(qfitci `outcome' firstafqt if inrange(firstafqt,50,68),	///
				clc(black) clw(medthick) fc(gs13) alc(white))	///
				(scatter `outcome' firstafqt if inrange(firstafqt,12,68),	///
				msiz(medsmall) mc(black) mfc(black) mlw(vvthin)),	///
				xtitle("First AFQT Score on File") xlabel(15(5)65) xmtick(12(1)68)	///
				ytitle("``outcome'_l'") ylabel(, angle(0) format(``outcome'_f'))	///
				xline(30.5 49.5, lcolor(black) lpattern(dot)) legend(off)
			graph export "${output}balance/covbalance_qfit_`outcome'_`strata'.pdf", as(pdf) replace
		}
		snapshot erase 2
	}
	snapshot erase 1
}	// end if
else di "*---- skipping balance graphs ----*"

*---------------------------------------*
*---- covariate balance regressions ----*
*---------------------------------------*
if `balance_regs'==1{
	*---- bring in data ----*
	use "${data}army-treasury-analysis", clear
	
	gen lo_educ = (educ_lessthanhs + educ_ged)
	gen hi_educ = (educ_some_coll + educ_coll_grad)

	foreach outcome of local incomelist{
		rename `outcome'_m1 `outcome'
	}
	
	keep pid access firstafqt *inst* k31* k50* quarterFE `stratalist' `incomelist' `varlist'
	
	gen c=1
	
	*snapshot 1
	snapshot save
	desc 
	
	foreach strata of local stratalist{
		snapshot restore 1
		keep if `strata'==1
		estimates clear
		foreach cov in `varlist' `incomelist'{
			*First Cutoff (31)
			reg `cov' inst31 k31 instk31 k31_2 instk31_2 ib175.quarter if inrange(firstafqt,12,49), r
			local b_`cov'_31 = _b[inst31]
			local se_`cov'_31 = _se[inst31]
			local N_`cov'_31 = e(N)
			sum `cov' if e(sample)
			local m_`cov'_31 = r(mean)
			reg `cov' inst31 k31 instk31 k31_2 instk31_2 ib175.quarter if inrange(firstafqt,12,49)
			estimates store `cov'_31
			
			*Second Cutoff (50)
			reg `cov' inst50 k50 instk50 k50_2 instk50_2 ib175.quarter if inrange(firstafqt,31,68), r
			local b_`cov'_50 = _b[inst50]
			local se_`cov'_50 = _se[inst50]
			local N_`cov'_50 = e(N)
			sum `cov' if e(sample)
			local m_`cov'_50 = r(mean)
			reg `cov' inst50 k50 instk50 k50_2 instk50_2 ib175.quarter if inrange(firstafqt,31,68)
			estimates store `cov'_50
		}
		
		*---- F-tests ----*
		foreach cut in 31 50{
			local test_list_`cut' ""
			local suest_list_`cut' ""
			if "`strata'"=="all" local covlist "`varlist' `incomelist'"
			else local covlist "age_days male educ_inhs lo_educ educ_hsdip hi_educ `incomelist'"
			foreach cov in `covlist'{
				local suest_list_`cut' "`suest_list_`cut'' `cov'_`cut'"
				local test_list_`cut' "`test_list_`cut'' [`cov'_`cut'_mean]inst`cut'"
			}
			di "`suest_list_`cut''"
			suest `suest_list_`cut'', vce(robust) coeflegend
			di "`test_list_`cut''"
			test inst`cut'
			local chi2_`cut' = r(chi2)
			local p_`cut' = r(p)
		}

		*---- output balance regression output ----*
		clear
		set obs 1
		foreach cut in 31 50 {
			foreach cov in `varlist' `incomelist' {
				foreach x in b se N m {
					gen `x'_`cov'_`cut' = ``x'_`cov'_`cut''
				}
			}
			foreach x in chi2 p {
				gen `x'_`cut' = ``x'_`cut''
			}
		}
		save "${output}balance/balance-results-`strata'", replace
	}
}	// end if
else di "*---- skipping balance regressions ----*"

*-----------------------------------------*
*---- output covariate balance tables ----*
*-----------------------------------------*
if `balance_tabs'==1{
	foreach strata of local stratalist {
		use "${output}balance/balance-results-`strata'", clear
		
		foreach cov in `varlist' `incomelist' {
			foreach cut in 31 50 {
				if abs(b_`cov'_`cut'/se_`cov'_`cut') < 1.645	local star_`cov'_`cut'_`strata' = "" 
				if abs(b_`cov'_`cut'/se_`cov'_`cut') >= 1.645	local star_`cov'_`cut'_`strata' = "*" 
				if abs(b_`cov'_`cut'/se_`cov'_`cut') >= 1.96 	local star_`cov'_`cut'_`strata' = "**" 
				if abs(b_`cov'_`cut'/se_`cov'_`cut') >= 2.576 	local star_`cov'_`cut'_`strata' = "***" 
			}
		}
	}
	
	use "${output}balance/balance-results-all", clear
	append using "${output}balance/balance-results-black"
	append using "${output}balance/balance-results-white"
	
	file open b using "${output}balance/balance.tex", write replace
	file write b "\begin{tabular}{l*{6}c}" _n "\hline \hline" _n
	file write b "& \multicolumn{2}{c}{\underline{All}} & \multicolumn{2}{c}{\underline{Black}} & \multicolumn{2}{c}{\underline{White}} \\" _n
	file write b "& 31 Cutoff & 50 Cutoff & 31 Cutoff & 50 Cutoff & 31 Cutoff & 50 Cutoff \\" _n
	file write b "& (1) & (2) &  (3) &  (4) & (5) &  (6)  \\ \cline{2-7} \\" _n
	file write b "& \multicolumn{6}{c}{\underline{Time of Application}} \\" _n
	foreach x of local varlist{
		if inlist("`x'","white","black","hisp") {
			file write b "``x'_l'" _char(38) ``x'_f' (b_`x'_31[1]) "`star_`x'_31_all'" _char(38) ``x'_f' (b_`x'_50[1]) "`star_`x'_50_all'" _char(38) "" _char(38) "" _char(38) "" _char(38) "\\" _n
			file write b "" _char(38) _char(40) ``x'_f' (se_`x'_31[1]) _char(41) _char(38) _char(40) ``x'_f' (se_`x'_50[1]) _char(41) _char(38) "" _char(38) "" _char(38) "" _char(38) "\\" _n
		}
		else {
			file write b "``x'_l'" _char(38) ``x'_f' (b_`x'_31[1]) "`star_`x'_31_all'" _char(38) ``x'_f' (b_`x'_50[1]) "`star_`x'_50_all'" _char(38) ``x'_f' (b_`x'_31[2]) "`star_`x'_31_black'" _char(38) ``x'_f' (b_`x'_50[2]) "`star_`x'_50_black'" _char(38) ``x'_f' (b_`x'_31[3]) "`star_`x'_31_white'" _char(38) ``x'_f' (b_`x'_50[3]) "`star_`x'_50_white'" "\\" _n
			file write b "" _char(38) _char(40) ``x'_f' (se_`x'_31[1]) _char(41) _char(38) _char(40) ``x'_f' (se_`x'_50[1]) _char(41) _char(38) _char(40) ``x'_f' (se_`x'_31[2]) _char(41) _char(38) _char(40) ``x'_f' (se_`x'_50[2]) _char(41) _char(38) _char(40) ``x'_f' (se_`x'_31[3]) _char(41) _char(38) _char(40) ``x'_f' (se_`x'_50[3]) _char(41) "\\" _n
		}
	}
	file write b "\hline" _n "Observations" _char(38) %11.0fc (N_age_days_31[1]) _char(38) %11.0fc (N_age_days_50[1]) _char(38) %11.0fc (N_age_days_31[2]) _char(38) %11.0fc (N_age_days_50[2]) _char(38) %11.0fc (N_age_days_31[3]) _char(38) %11.0fc (N_age_days_50[3]) "\\" _n
	file write b "\hline \\" _n
	file write b "& \multicolumn{6}{c}{\underline{Year Prior to Application}} \\" _n
	foreach x of local incomelist{
		file write b "``x'_l'" _char(38) ``x'_f' (b_`x'_31[1]) "`star_`x'_31_all'" _char(38) ``x'_f' (b_`x'_50[1]) "`star_`x'_50_all'" _char(38) ``x'_f' (b_`x'_31[2]) "`star_`x'_31_black'" _char(38) ``x'_f' (b_`x'_50[2]) "`star_`x'_50_black'" _char(38) ``x'_f' (b_`x'_31[3]) "`star_`x'_31_white'" _char(38) ``x'_f' (b_`x'_50[3]) "`star_`x'_50_white'" "\\" _n
		file write b "" _char(38) ``x'_f' _char(40) (se_`x'_31[1]) _char(41) _char(38) ``x'_f' _char(40) (se_`x'_50[1]) _char(41) _char(38) ``x'_f' _char(40) (se_`x'_31[2]) _char(41) _char(38) ``x'_f' _char(40) (se_`x'_50[2]) _char(41) _char(38) ``x'_f' _char(40) (se_`x'_31[3]) _char(41) _char(38) ``x'_f' _char(40) (se_`x'_50[3]) _char(41) "\\" _n
	}
	file write b "\hline" _n "Observations" _char(38) %11.0fc (N_wages_31[1]) _char(38) %11.0fc (N_wages_50[1]) _char(38) %11.0fc (N_wages_31[2]) _char(38) %11.0fc (N_wages_50[2]) _char(38) %11.0fc (N_wages_31[3]) _char(38) %11.0fc (N_wages_50[3]) "\\" _n
	file write b "P-value for Joint Significance" _char(38) %5.3f (p_31[1]) _char(38) %5.3f (p_50[1]) _char(38) %5.3f (p_31[2]) _char(38) %5.3f (p_50[2]) _char(38) %5.3f (p_31[3]) _char(38) %5.3f (p_50[3]) "\\" _n
	file write b "\hline \hline" _n "\end{tabular}" _n
	file close b
}	//end if
else di "*---- skipping balance tables ----*"

snapshot erase _all
